#!/bin/bash

set -o errexit
export LC_ALL=C

export TMP_DATA_PATH=/tmp/mixagol/annotation_cluster

export BASE_DATA_PATH=/home/mixagol/work/data
# export BASE_DATE=20111030
export BASE_DATE=20120802

export GENOME_ID=$1
export AN_DIR=~/work/data/annotation_cluster_1K/${GENOME_ID}

if [ ! -d ${AN_DIR} ]; then
    echo "Directory for annotation genome ${GENOME_ID} does not exists!" > /dev/stderr
    exit 1
fi

hostname > ${AN_DIR}/worker_hostname

##
## NORM MATRIX
##
export CUR_DATA_DIR=${TMP_DATA_PATH}/4_normed_matrix/${BASE_DATE}/wu_blast_nw_excl_${GENOME_ID}
mkdir -p $CUR_DATA_DIR

cp ${BASE_DATA_PATH}/4_normed_matrix/${BASE_DATE}/wu_blast_nw/map_int_gene.txt  ${CUR_DATA_DIR}/
cp ${BASE_DATA_PATH}/4_normed_matrix/${BASE_DATE}/wu_blast_nw/map_int_genom.txt ${CUR_DATA_DIR}/

zcat ${BASE_DATA_PATH}/3_raw_matrix_nw/${BASE_DATE}/wu_blast/results.txt.gz \
    | awk -F'\t' '{print $2"\t"$1"\t"$3}' \
    | 4_normed_matrix/apply_maps.py \
            --genes-map ${CUR_DATA_DIR}/map_int_gene.txt \
            --genomes-map ${CUR_DATA_DIR}/map_int_genom.txt \
            --excl-genomes ${GENOME_ID} \
    | gzip \
    > ${CUR_DATA_DIR}/matrix.txt.gz

export Z_VALUE=5

zcat ${CUR_DATA_DIR}/matrix.txt.gz \
    | awk -F'\t' '{if ($3>='$Z_VALUE') print $1"\t"$2}' \
    | gzip \
    > ${CUR_DATA_DIR}/matrix_raw_${Z_VALUE}_.txt.gz

rm ${CUR_DATA_DIR}/matrix.txt.gz

zcat ${CUR_DATA_DIR}/matrix_raw_${Z_VALUE}_.txt.gz \
    | 4_normed_matrix/add_vec_len.py \
    | awk -F'\t' '{print $2"\t"$1}' \
    | sort -t$'\t' -n -k1,1 -S500M -T ${CUR_DATA_DIR} \
    | 4_normed_matrix/create_matrix.py \
    | gzip \
    > ${CUR_DATA_DIR}/matrix_trans_${Z_VALUE}_.txt.gz

rm ${CUR_DATA_DIR}/matrix_raw_${Z_VALUE}_.txt.gz

zcat ${CUR_DATA_DIR}/matrix_trans_${Z_VALUE}_.txt.gz \
    | tools/create_bsddb.py ${CUR_DATA_DIR}/matrix_trans_${Z_VALUE}_.db
    
rm ${CUR_DATA_DIR}/matrix_trans_${Z_VALUE}_.txt.gz


##
## Annotate genes
##

# full
find $AN_DIR/ -type d -name "ann*" | sort | xargs -n1 -P1 6_annotation/annotate_results_1K.sh

# rm -rf ${CUR_DATA_DIR}


